package com.wd.taobao;

import java.util.LinkedList;
import java.util.List;

import com.wd.live.platform.AbstractProcessor;
import com.wd.live.platform.MyFilePipeline;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectors;
import us.codecraft.webmagic.selector.XpathSelector;

public class LingMeiPageProcessor extends AbstractProcessor {

	// 部分一：抓取网站的相关配置，包括编码、抓取间隔、重试次数等
	private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);

	public void process(Page page) {

		List<String> list = page.getHtml().xpath("//div[@class='item4line1']/html()").all();
		List<String[]> result = new LinkedList<String[]>();
		for (String str : list) {
			String[] array = new String[3];
			// 名称
			XpathSelector xpathSelector = Selectors.xpath("//dd[@class='detail']/a/html()");
			String gameName = new Html(xpathSelector.selectList(str), false).toString();
			array[0] = gameName;

			// 价格
			xpathSelector = Selectors
					.xpath("//dd[@class='detail']/div[@class='attribute']/div[@class='cprice-area']/span[@class='c-price']/html()");
			String userName = new Html(xpathSelector.selectList(str), false).toString().trim();
			array[1] = userName;

			// 销量
			xpathSelector = Selectors.xpath("//dd[@class='detail']/div[@class='attribute']/div[@class='sale-area']/span/html()");
			String total = new Html(xpathSelector.selectList(str), false).toString();
			array[2] = total;

			result.add(array);
		}

		page.putField("listDate", result);
	}

	public Site getSite() {

		return site;
	}

	public void execute() {

		Spider.create(new LingMeiPageProcessor())
				// 从"https://github.com/code4craft"开始抓
				.addUrl("https://nmsm.tmall.com/category.htm?spm=a1z10.5-b.w4011-10560649048.1.3z9p05")
				.addPipeline(new MyFilePipeline(FILE_PATH))
				// 开启5个线程抓取
				.thread(5)
				// 启动爬虫
				.run();
	}

	public static void main(String[] args) {
		
		LingMeiPageProcessor a = new LingMeiPageProcessor();
		a.execute();

	}
}